} else if (cosinus > 0 & k$A[1] < p1$omezeni[2]){
p1$omezeni[1] = max(p1$omezeni[1], k$A[1])
} else if (cosinus !=0) {
#if ((cosinus > 0 & k$A[1] > p1$omezeni[2]) | (cosinus < 0 & k$A[1] < p1$omezeni[1])){
pridat = FALSE
#}
} else {
l = data.frame(
n = s1,
u = pr1$u,
A = pr2$A - pr1$A
) %>%
LinRov()
temp = min(which(s2!=0))
if(s1[temp]/s2[temp] >= 0){
if (l$A[1] >= 0) {
pridat = FALSE
break
} else {
mazat = c(mazat, j)
}
} else {
if (l$A[1] < 0) {
return(list())
}
}
}
############################################################################################
cosinus = round(sum(pr2$u * s1),10) #/(sqrt(sum(pr2$u^2))*sqrt(sum(s1^2))) #####PRO ZJEDNODUSENI NUTNE POUZE ZNAMENKO STACI CITATEL!!!
# if(cosinus == -1) cosinus = 1
if (cosinus < 0 & k$A[2] > primky[[j]]$omezeni[1]){
primky[[j]]$omezeni[2] = min(primky[[j]]$omezeni[2], k$A[2])
} else if (cosinus > 0 & k$A[2] < primky[[j]]$omezeni[2]){
primky[[j]]$omezeni[1] = max(primky[[j]]$omezeni[1], k$A[2])
} else if (cosinus !=0) {
mazat = c(mazat, j)
}
######################################################################################################
}
}
}
if (!is.null(mazat)){
primky = primky[-mazat]
}
if(pridat){
primky = list.append(primky,p1)
}
if (length(primky) == 0 & tf) return(list())
#print(paste("delka",length(primky)))
}
if (length(primky) != 0){
mazat = NULL
for (i in 1:length(primky)) {
if (diff(primky[[i]]$omezeni) < 0.0000001)
mazat = c(mazat, i)
}
if (!is.null(mazat)){
primky = primky[-mazat]
}
}
return(primky)
}
obsah = function(primky){
if (length(primky) == 0) return(0)
vrcholy = data.frame()
for (i in 1:length(primky)){
for (j in 1:2){
k = primky[[i]]$rovnice$A+primky[[i]]$omezeni[j]*primky[[i]]$rovnice$u
vrcholy = rbind(vrcholy, k)
}
}
colnames(vrcholy) = c("x","y")
if (any(is.na(vrcholy) | abs(vrcholy) == Inf)){
s = Inf
} else {
vrcholy = round(vrcholy, 10)
minx = min(vrcholy$x)
x = min(which(vrcholy$x == minx))
poradi = c(x)
for (i in 1:(nrow(vrcholy)/2)) {
x = x + ((x%%2) -0.5)*2
stejny = which(vrcholy$x == vrcholy$x[x] & vrcholy$y == vrcholy$y[x])
x = stejny[stejny != x]
poradi = c(poradi, x)
}
while (poradi[1] != poradi[length(poradi)]) poradi = poradi[-length(poradi)]
vrcholy = vrcholy[poradi,]
miny = min(vrcholy$y)
maxx = max(vrcholy$x)
konec = min(which(vrcholy$x == maxx))
s1 = 0
for (i in 2:konec){
s1 = s1 + ((vrcholy$y[i-1] + vrcholy$y[i] ) / 2 - miny) * (vrcholy$x[i] - vrcholy$x[i-1])
}
s2 = 0
for (i in (1+konec):nrow(vrcholy)){
s2 = s2 + ((vrcholy$y[i-1] + vrcholy$y[i]) / 2 - miny) * (vrcholy$x[i-1] - vrcholy$x[i])
}
s = abs(s1-s2)
}
return(s)
}
################################################# nas pripad
dvojice = function(df){
nazvy = colnames(df)[-1]
f = strsplit(nazvy,"[..]")
t = data.frame(x=f[[1]][1], y=f[[1]][3])
for (i in 2:length(f)) t = rbind(t,data.frame(x=f[[i]][1],y=f[[i]][3]))
return(t)
}
obspodgr = function(x,y){ # x je energie
s = 0
for (i in 2:length(x)){
s = s + ((y[i-1] + y[i]) / 2) * (x[i] - x[i-1])
}
return(s)
}
save.image("C:/Users/Vendula Nechutová/Desktop/Diplomka prilohy/.Rhistory.RData")
library(rlist)
library(dplyr)
outliers <- function(input_df, file_names, n = 10, method = 'hcc') {
#n is number of lines
if (is.null(input_df) || dim(input_df)[1] == 0) {
return(list())
}
outliers = matrix(0, 1, 10)
# hcc = horizontal_contour_crossing
extract_hcc <- function(input_df, file_names, n){
counter = matrix(0, length(file_names), n)
for (file in 1:length(file_names)) {
data <- subset(input_df, fid == file_names[file])$value
if (length(data) > 1) {
data <- normalize(data, n)
data <- ceiling(data)
data <- data[c(TRUE, !diff(data) == 0)] #remove not changing data
for (i in 1:(length(data) - 1)) {
if (data[i] != data[i + 1]) {
to_increment <- data[i] : (data[i + 1])
counter[file, to_increment] <- counter[file, to_increment] + 1
}
}
}# else
#  constRows[file] = TRUE  #function is constant
}
return(counter)
}
extract_hch <- function(input_df, file_names, n){
counter = matrix(0, length(file_names), n)
for (file in 1:length(file_names)) {
data <- subset(input_df, fid == file_names[file])$value
if (length(data) > 1) {
data <- normalize(data, n)
data <- ceiling(data) + (1-sign(data))
tbl <- table(data)
counter[file, as.numeric(names(tbl))] <- as.vector(tbl)
}# else
#  constRows[file] = TRUE  #function is constant
}
return(counter)
}
if(method == 'hcc'){
counter <- extract_hcc(input_df, file_names, n)
}
# hch = horizontal_contour_histogram
if(method == 'hch'){
counter <- extract_hch(input_df, file_names, n)
}
#OUTLIER ANALYSIS
#counter = counter[!constRows, ]#remove constant functions(probably not relevant) and constant columns (all the vectors are the same in the parameter, so ot is not important for comparison)
counter = as.data.frame(counter[, !sapply(as.data.frame(counter), function(x)
all(x == x[1]))])
if (!ncol(counter) == 0) {
#if all rows are the same, there are no outliers
if (!ncol(counter) == 1) {
#counter = apply(counter, 2, function(x)
#  (x - min(x)) / (max(x) - min(x)) * 2 - 1)#normalize
PCA = prcomp(as.data.frame(counter),
center = FALSE,
scale. = FALSE)#PCA analysis
nov = as.matrix(counter) %*% PCA$rotation #new parameters made by PCA
if (det(cov(nov[, 1:2])) > 1e-40) {
score = mahalanobis(nov[, 1:2], colMeans(nov[, 1:2]), cov(nov[, 1:2])) #mahalanobis distance abnormality score
} else
score = abs(nov[, 2] - mean(nov[, 2])) #when not posibble to make m. distance because of similarity of rows, we use just PCA1-mean(PCA1)
} else
score = counter[, 1]
constRows = rep(FALSE, length(file_names))
score = arrange(data.frame(file_names[!constRows], score), score)
if (nrow(score) > 10) {
for (i in 0:9)
# find top 10 outlier pumps (in outliers matrix are pumps ID)
if (score[(nrow(score) - i), 2] > mean(score[(1:(nrow(score) - i - 1)), 2]))
outliers[i + 1] = as.character(score[(nrow(score) - i), 1])
} else
outliers = as.character(rev(score[order(score$score), 1]))
less = as.character(score[order(score$score), ][1, 1])
} else
less = 0
#browser()
list(outliers = outliers, less = less, score = score)
}
# Normalize vector of numbers to be between 0 and n
normalize <- function(v, n = 1) {
min <- min(v)
max <- max(v)
(v - min) / (max - min) * n
}
dtwMinLength = function (d1, d2) {
d1 = data.frame(n1 = 1:length(d1),d1 = d1)
d2 = data.frame(n2 = 1:length(d2),d2 = d2)
konst1 = which(diff(d1$d1)==0)+1
konst2 = which(diff(d2$d2)==0)+1
if (length(konst1) != 0){
konst1 = konst1[konst1 != nrow(d1)]
d1 = d1[-konst1,]
}
if (length(konst2) != 0){
konst2 = konst2[konst2 != nrow(d2)]
d2 = d2[-konst2,]
}
library(dtw)
alignment <- dtw(d1$d1, d2$d2, keep=TRUE)
d = data.frame(d2[alignment$index2,],d1[alignment$index1,])
for (i in konst2) d = rbind(d,data.frame(n2 = i,d[d$n2 == i-1,2:4]))
for (i in konst1) d = rbind(d,data.frame(n1 = i,d[d$n1 == i-1,c(1,2,4)]))
d$dist = abs(d$d1 - d$d2) #abs(d$d2 - d$d1)
#mindist = aggregate(d[,c("n2", "dist")], list(f = d$n2), min)[-1]
#mindist2 = aggregate(d[,c("n1", "dist")], list(f = d$n1), min)[-1]
return(mean(d$dist)) #mindist - pr?m?r mezi min n1 a min n2???????
} #d1 je vzor
delka = function(a){
sqrt(sum(a^2))
}
jednotkovy = function(a){
a/delka(a)
}
crossProduct <- function(ab,ac){
abci = ab[2] * ac[3] - ac[2] * ab[3];
abcj = ac[1] * ab[3] - ab[1] * ac[3];
abck = ab[1] * ac[2] - ac[1] * ab[2];
return (c(abci, abcj, abck))
}
# plocha mezi body
RovinaMeziBody = function(x,y){
if (all(x==y)){return(NA)}
bod <- as.numeric((x+y)/2)
u <-  as.numeric(x-y)
if (u[3]!=0){
a = c(1,1,-(u[1]+u[2])/u[3])
} else {
a = c(0,0,1)
}
b=crossProduct(u,a)
return(data.frame(A=bod,u=jednotkovy(a),v=jednotkovy(b)))
}
#prunik rovin
LinRov = function(df){
radky = nrow(df)
for (i in 1:(radky-1)){
if (df[i,i] == 0){
#if (all(df[i:radky,i] == 0)) return(d)
s = min(which(df[i:radky,i] != 0))+i-1
temp = df[i,]
df[i,] = df[s,]
df[s,] = temp
}
for (j in (i+1):radky){
if (df[j,i] !=0){
k = df[i,i]/df[j,i]
df[j,] = df[j,]*k - df[i,]
}
}
}
for (i in radky:2){
for (j in 1:(i-1)){
if (df[j,i] !=0){
k = df[i,i]/df[j,i]
df[j,] = df[j,]*k - df[i,]
}
}
}
for (i in 1:radky){
df[i,] = df[i,]/df[i,i]
}
return(df)
}
PrunikRovin = function(r,t){
nr = jednotkovy(crossProduct(r$u,r$v))
nt = jednotkovy(crossProduct(t$u,t$v))
u = jednotkovy(crossProduct(nr,nt))
kol = jednotkovy(crossProduct(u, nr))
df = data.frame(l1 = -kol,
k2 = t$u,
l2 = t$v,
A = r$A-t$A)
l1 = LinRov(df)$A[1]
bod = r$A + l1 * kol
return(data.frame(A = bod, u = jednotkovy(u)))
}
#transformace souradnic v rovine, pocátek A z roviny, souradnice k, l
transbod = function(bod, rov){
rovnice = data.frame(u=rov$u,v=rov$v,A=bod-rov$A)
if(all(rovnice[1,] == 0)){
rovnice[1,] = rovnice[3,]
}
s = min(which(rovnice[1,] != 0))
if( all(round(rovnice[2,] - rovnice[1,] * rovnice[2,s] / rovnice[1,s], 8) == 0) ){
rovnice[2,] = rovnice[3,]
rovnice = rovnice[-3,]
}
rovnice = LinRov(rovnice[1:2,])
return(rovnice$A[1:2])
}
transVekt = function(vekt, rov){
rovnice = data.frame(u=rov$u,v=rov$v,A=vekt)
if(all(rovnice[1,] == 0)){
rovnice[1,] = rovnice[3,]
}
s = min(which(rovnice[1,] != 0))
if( all(round(rovnice[2,] - rovnice[1,] * rovnice[2,s] / rovnice[1,s], 10) == 0) ){
rovnice[2,] = rovnice[3,]
rovnice = rovnice[-3,]
}
rovnice = LinRov(rovnice[1:2,])
return(rovnice$A[1:2])
}
zobrazVekt = function(vekt, rov){
rovnice = data.frame(
u=rov$u,
v=rov$v,
n=crossProduct(rov$u,rov$v),
A=vekt
)
rovnice = LinRov(rovnice)
return(rovnice$A[1:2])
}
################################################# voronoi
voronoi = function(body, poz1, poz2){
body = body[c(poz1, poz2, c(1:nrow(body))[-c(poz1, poz2)]),]
ZaklRov = RovinaMeziBody(body[1,],body[2,])
# nv = c(body[2,2]-body[1,2], body[1,1]-body[2,1])
primky = list()
for (i in 3:nrow(body)) {#nrow(body)
#print(paste("i=",i))
pridat = TRUE
mazat = NULL
tf = TRUE
temp = (body[i,1] - body[2,1]) / (body[1,1] - body[2,1])
if (is.na(temp)){
temp = (body[i,2] - body[2,2]) / (body[1,2] - body[2,2])
if(is.na(temp)){
temp = (body[i,3] - body[2,3]) / (body[1,3] - body[2,3])
}
}
if (all(round(body[i,],10) == round(body[2,] + temp * (body[1,] - body[2,]),10))){
if (between(temp,0,1)){
return(list())
} else{
pridat = FALSE
tf = FALSE
}
}
if (tf) {
r = RovinaMeziBody(body[1,],body[i,])
p = PrunikRovin(r,ZaklRov)
p1 = list(
rovnice = data.frame(
A = transbod(p$A, ZaklRov),
u = transVekt(p$u, ZaklRov)
),
smer = zobrazVekt(body[1,] - body[i,], ZaklRov),
omezeni = c(-Inf,Inf)
)
if (length(primky)>0) {
pr1 = p1$rovnice
s1 = p1$smer
for (j in 1:length(primky)){
#print(j)
pr2 = primky[[j]]$rovnice
s2 = primky[[j]]$smer
k =
data.frame(
u1 = pr1$u,
u2 = -pr2$u,
A = pr2$A - pr1$A
) %>%
LinRov() %>%
select(A)
#prunik = pr1$A+ k[1,1] * pr1$u
#########################################################################################################
cosinus = round(sum(pr1$u * s2),10) #/(sqrt(sum(pr1$u^2))*sqrt(sum(s2^2))) # lze odd?lat celou ??st za lomitkem
#uhel mezi n2 a u1 < 90stupnu. (cos vetsi nez 1) potom min, jinak max
#pokud provno 0, potom rovnobezky
if (cosinus < 0 & k$A[1] > p1$omezeni[1]){
p1$omezeni[2] = min(p1$omezeni[2], k$A[1])
} else if (cosinus > 0 & k$A[1] < p1$omezeni[2]){
p1$omezeni[1] = max(p1$omezeni[1], k$A[1])
} else if (cosinus !=0) {
#if ((cosinus > 0 & k$A[1] > p1$omezeni[2]) | (cosinus < 0 & k$A[1] < p1$omezeni[1])){
pridat = FALSE
#}
} else {
l = data.frame(
n = s1,
u = pr1$u,
A = pr2$A - pr1$A
) %>%
LinRov()
temp = min(which(s2!=0))
if(s1[temp]/s2[temp] >= 0){
if (l$A[1] >= 0) {
pridat = FALSE
break
} else {
mazat = c(mazat, j)
}
} else {
if (l$A[1] < 0) {
return(list())
}
}
}
############################################################################################
cosinus = round(sum(pr2$u * s1),10) #/(sqrt(sum(pr2$u^2))*sqrt(sum(s1^2))) #####PRO ZJEDNODUSENI NUTNE POUZE ZNAMENKO STACI CITATEL!!!
# if(cosinus == -1) cosinus = 1
if (cosinus < 0 & k$A[2] > primky[[j]]$omezeni[1]){
primky[[j]]$omezeni[2] = min(primky[[j]]$omezeni[2], k$A[2])
} else if (cosinus > 0 & k$A[2] < primky[[j]]$omezeni[2]){
primky[[j]]$omezeni[1] = max(primky[[j]]$omezeni[1], k$A[2])
} else if (cosinus !=0) {
mazat = c(mazat, j)
}
######################################################################################################
}
}
}
if (!is.null(mazat)){
primky = primky[-mazat]
}
if(pridat){
primky = list.append(primky,p1)
}
if (length(primky) == 0 & tf) return(list())
#print(paste("delka",length(primky)))
}
if (length(primky) != 0){
mazat = NULL
for (i in 1:length(primky)) {
if (diff(primky[[i]]$omezeni) < 0.0000001)
mazat = c(mazat, i)
}
if (!is.null(mazat)){
primky = primky[-mazat]
}
}
return(primky)
}
obsah = function(primky){
if (length(primky) == 0) return(0)
vrcholy = data.frame()
for (i in 1:length(primky)){
for (j in 1:2){
k = primky[[i]]$rovnice$A+primky[[i]]$omezeni[j]*primky[[i]]$rovnice$u
vrcholy = rbind(vrcholy, k)
}
}
colnames(vrcholy) = c("x","y")
if (any(is.na(vrcholy) | abs(vrcholy) == Inf)){
s = Inf
} else {
vrcholy = round(vrcholy, 10)
minx = min(vrcholy$x)
x = min(which(vrcholy$x == minx))
poradi = c(x)
for (i in 1:(nrow(vrcholy)/2)) {
x = x + ((x%%2) -0.5)*2
stejny = which(vrcholy$x == vrcholy$x[x] & vrcholy$y == vrcholy$y[x])
x = stejny[stejny != x]
poradi = c(poradi, x)
}
while (poradi[1] != poradi[length(poradi)]) poradi = poradi[-length(poradi)]
vrcholy = vrcholy[poradi,]
miny = min(vrcholy$y)
maxx = max(vrcholy$x)
konec = min(which(vrcholy$x == maxx))
s1 = 0
for (i in 2:konec){
s1 = s1 + ((vrcholy$y[i-1] + vrcholy$y[i] ) / 2 - miny) * (vrcholy$x[i] - vrcholy$x[i-1])
}
s2 = 0
for (i in (1+konec):nrow(vrcholy)){
s2 = s2 + ((vrcholy$y[i-1] + vrcholy$y[i]) / 2 - miny) * (vrcholy$x[i-1] - vrcholy$x[i])
}
s = abs(s1-s2)
}
return(s)
}
################################################# nas pripad
dvojice = function(df){
nazvy = colnames(df)[-1]
f = strsplit(nazvy,"[..]")
t = data.frame(x=f[[1]][1], y=f[[1]][3])
for (i in 2:length(f)) t = rbind(t,data.frame(x=f[[i]][1],y=f[[i]][3]))
return(t)
}
obspodgr = function(x,y){ # x je energie
s = 0
for (i in 2:length(x)){
s = s + ((y[i-1] + y[i]) / 2) * (x[i] - x[i-1])
}
return(s)
}
